########## Supplementary materials 1 #############

## This code replicates all results from section 1 of the supplementary materials
## This includes:
## 1. Descriptive statistics for polling stations and their characteristics
## 2. Tests of sample representativeness
## 3. Descriptive statistics from the Afrobarometer data

## Load required packages

rm(list=ls())

library(tidyverse)
library(reshape2)
library(fixest)

#### POLLING STATION DATA

ps_info <- readRDS("ps_info.rds") ## Availability data for tables/regressions
ps_covs_raw <- readRDS("ps_covs_raw.rds") ## Raw covariates

###### Table 1 (station availability by race)  #######

ps_available_tab1 <- ps_info %>%
  group_by(available_pres) %>%
  filter(available_pres == "1") %>%
  summarise(Count = n()) %>%
  transmute(Race = "President", 
            Count = Count)

ps_available_tab2 <- ps_info %>%
  group_by(available_parl) %>%
  filter(available_parl == "1") %>%
  summarise(Count = n()) %>%
  transmute(Race = "Parliament", 
            Count = Count)

ps_available_tab3 <- ps_info %>%
  group_by(available_council) %>%
  filter(available_council == "1") %>%
  summarise(Count = n()) %>%
  transmute(Race = "Council", 
            Count = Count)

ps_available_tab <- rbind(ps_available_tab1, 
                          ps_available_tab2, 
                          ps_available_tab3) %>%
  mutate(`Share (%)` = (Count/4445)*100,
         Panel = Count*2) # share of 2014 polling stations in full panel


kable(ps_available_tab, format = "latex", booktabs = TRUE, 
      digits=2, 
      caption = "Available 2014 polling stations, by race") %>% 
  row_spec(c(0), bold=T) %>%
  kable_styling(latex_options = "hold_position", 
                font_size = 10) 


######## FIGURE 1 (distribution of polling station characteristics) #########

ps_covs <- ps_covs_raw %>%
  dplyr::select(ps_id, 
                elevation, 
                distance_road_2016,
                dep_ratio, 
                poverty11,
                nightlights_2014,
                pop_density14) %>%
  mutate(pop_density14 = log(pop_density14 + 1),
         nightlights_2014 = log(nightlights_2014), 
         poverty11 = poverty11*100) %>%
  rename(`Population density (log)` = pop_density14, 
         `Elevation (m)` = elevation,
         `Poverty (%)` = poverty11,
         `Distance to road (km)` = distance_road_2016,
         `Dependency ratio` = dep_ratio, 
         `Nightlights (log)` = nightlights_2014) %>%
  melt(id="ps_id") 

ggplot(ps_covs, aes(x=value)) +
  facet_wrap(~variable, scales="free") +
  geom_histogram(bins=20, col="black", fill = "darkgrey") +
  theme_bw() +
  ylab("Count") +
  theme(axis.title.x=element_blank())

##### SAMPLE REPRESENTATIVENESS (Figures 2-3, Tables 2-10) ####

### Run regressions to check differences in outcomes relative to inclusion in the sample

###### With ward FEs

bal1.1 <- feols(c(dpp_share, 
                  rejected_share, 
                  total_cast) ~ available_pres | ward, 
                data = ps_info)

bal1.2 <- feols(c(dpp_share, 
                  rejected_share, 
                  total_cast) ~ available_parl | ward, 
                data = ps_info)

bal1.3 <- feols(c(dpp_share, 
                  rejected_share, 
                  total_cast) ~ available_council | ward, 
                data = ps_info)

bal2.1 <- feols(c(pop_density14, poverty11,
                  elevation, 
                  distance_road_2016, 
                  dep_ratio,
                  nightlights_2014) ~ available_pres | ward, 
                data = ps_info)

bal2.2 <- feols(c(pop_density14, poverty11,
                  elevation, 
                  distance_road_2016, 
                  dep_ratio,
                  nightlights_2014) ~ available_parl | ward, 
                data = ps_info)

bal2.3 <- feols(c(pop_density14, poverty11,
                  elevation, 
                  distance_road_2016, 
                  dep_ratio,
                  nightlights_2014) ~ available_council | ward, 
                data = ps_info)

###### With constituency FEs

bal3.1 <- feols(c(dpp_share, 
                  rejected_share, 
                  total_cast) ~ available_pres | constituency, 
                data = ps_info)

bal3.2 <- feols(c(dpp_share, 
                  rejected_share, 
                  total_cast) ~ available_parl | constituency, 
                data = ps_info)

bal3.3 <- feols(c(dpp_share, 
                  rejected_share, 
                  total_cast) ~ available_council | constituency, 
                data = ps_info)

bal4.1 <- feols(c(pop_density14, poverty11,
                  elevation, 
                  distance_road_2016, 
                  dep_ratio,
                  nightlights_2014) ~ available_pres | constituency, 
                data = ps_info)

bal4.2 <- feols(c(pop_density14, poverty11,
                  elevation, 
                  distance_road_2016, 
                  dep_ratio,
                  nightlights_2014) ~ available_parl | constituency, 
                data = ps_info)

bal4.3 <- feols(c(pop_density14, poverty11,
                  elevation, 
                  distance_road_2016, 
                  dep_ratio,
                  nightlights_2014) ~ available_council | constituency, 
                data = ps_info)

## Save results in data.frame to make plots

bal_list <- c(bal1.1, bal1.2, bal1.3, 
              bal2.1, bal2.2, bal2.3,
              bal3.1, bal3.2, bal3.3,
              bal4.1, bal4.2, bal4.3)

bal_df <- map_df(bal_list, broom::tidy, .id="model") %>%
  filter(term != "(Intercept)") %>%
  mutate(conf.low = estimate - 1.96*std.error,
         conf.high = estimate + 1.96*std.error,
         level =  ifelse(term == "available_pres", "President", 
                         ifelse(term == "available_parl", "Parliament", "Council")),
         outcome = rep(c(rep(c("DPP share (%)", 
                               "Rejected share (%)", 
                               "Total cast (count)"), 3),
                         rep(c("Population density", 
                               "Poverty", 
                               "Elevation", 
                               "Distance to main road", 
                               "Dependency ratio", 
                               "Nightlights"), 3)), 2), 
         FEs = c(rep(c("Ward", "Constituency"), each=27)))

bal_df_political <- bal_df %>% filter(outcome %in% 
                                        c("DPP share (%)", 
                                          "Rejected share (%)", 
                                          "Total cast (count)"))

bal_df_demographic <- bal_df %>% filter(!outcome %in% 
                                          c("DPP share (%)", 
                                            "Rejected share (%)", 
                                            "Total cast (count)"))

#### FIGURE 2

ggplot(data=bal_df_political, aes(x = estimate, 
                                           y = level)) +
  facet_wrap(~outcome, scale="free_x") +
  theme(panel.border = element_rect(colour = "black", fill = NA)) +
  geom_vline(xintercept=0, linetype=2) +
  geom_errorbarh(aes(xmin=conf.low, xmax=conf.high, height=0, width=0, col=FEs),
                 position = position_dodge(width=0.5)) +
  geom_point(position = position_dodge(width=0.5), alpha=0.7, aes(col=FEs)) +
  theme_bw() +
  theme(legend.position = "bottom", 
        axis.title.y = element_blank()) +
  xlab("Difference (% / count)") +
  ggtitle("a) Political differences") +
  scale_colour_manual(values = c("darkorange", "darkblue"))

### FIGURE 3

ggplot(data=bal_df_demographic, aes(x = estimate, 
                                             y = level)) +
  facet_wrap(~outcome, scale="free_x", nrow=2) +
  theme(panel.border = element_rect(colour = "black", fill = NA)) +
  geom_vline(xintercept=0, linetype=2) +
  geom_errorbarh(aes(xmin=conf.low, xmax=conf.high, height=0, width=0, col=FEs),
                 position = position_dodge(width=0.5)) +
  geom_point(position = position_dodge(width=0.5), alpha=0.7, aes(col=FEs)) +
  theme_bw() +
  theme(legend.position = "bottom", 
        axis.title.y = element_blank()) +
  xlab("Difference (standard deviations) ") +
  ggtitle("b) Demographic differences") +
  scale_colour_manual(values = c("darkorange", "darkblue"))

## Results tables 2-10

setFixest_dict(dpp_share = "Ruling party vote share (%)", 
               rejected_share = "Ballot rejection rate (%)",
               total_cast = "Total votes cast (count)",
               elevation = "Elevation (standardised)", 
               poverty11 = "Poverty (standardised)", 
               distance_road_2016 = "Distance to main road (standardised)",
               dep_ratio = "Dependency ratio (standardised)",
               nightlights_2014 = "Nightlights (standardised)",
               pop_density14 = "Population density (standardised)",
               available_pres = "Available (President)", 
               available_parl = "Available (Parliament)", 
               available_council = "Available (Council)", 
               ward = "Ward", 
               constituency = "Constituency")

etable(bal1.1[1], bal3.1[1], 
       bal1.2[1], bal3.2[1],
       bal1.3[1], bal3.3[1], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Ruling party vote share")

etable(bal1.1[2], bal3.1[2], 
       bal1.2[2], bal3.2[2],
       bal1.3[2], bal3.3[2], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Ballot rejection rate")

etable(bal1.1[3], bal3.1[3], 
       bal1.2[3], bal3.2[3],
       bal1.3[3], bal3.3[3], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Total votes cast")

etable(bal2.1[1], bal4.1[1], 
       bal2.2[1], bal4.2[1],
       bal2.3[1], bal4.3[1], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Population density")

etable(bal2.1[2], bal4.1[2], 
       bal2.2[2], bal4.2[2],
       bal2.3[2], bal4.3[2], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Poverty")

etable(bal2.1[3], bal4.1[3], 
       bal2.2[3], bal4.2[3],
       bal2.3[3], bal4.3[3], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Elevation")

etable(bal2.1[4], bal4.1[4], 
       bal2.2[4], bal4.2[4],
       bal2.3[4], bal4.3[4], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Distance to main road")

etable(bal2.1[5], bal4.1[5], 
       bal2.2[5], bal4.2[5],
       bal2.3[5], bal4.3[5], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Dependency ratio")

etable(bal2.1[6], bal4.1[6], 
       bal2.2[6], bal4.2[6],
       bal2.3[6], bal4.3[6], 
       tex=TRUE, 
       fontsize = "scriptsize", 
       title = "Balance tests for missing 2014 stations: Nightlights")

#### COVERAGE DATA ####

## Table 12 also features in main text. Code is in file "background_info.R"

#### AFROBAROMETER DATA ####

rm(list=ls())
AB_malawi <- readRDS("AB_malawi.rds")

#### FIGURE 4 ######

ggplot(AB_malawi, aes(x=factor(group,
                                         levels = c("Control", 
                                                    "Always covered",
                                                    "Enter coverage")),
                                fill=group)) +
  geom_bar(col="black") +
  theme_bw() +
  scale_fill_manual(values = c("darkblue", "darkgrey", "darkorange")) +
  theme(legend.title = element_blank(), 
        axis.title.x = element_blank(), 
        legend.position = "bottom") +
  ylab("Count")





